clear



capture erase "$P_Data_Processed/broadband_speed.dta"

clear

gener square = ""
gener x_coordinate =.
gener y_coordinate =.
gener network_type = ""
gener n_measurements = .
gener year = . 

save "$P_Data_Processed/broadband_speed.dta", replace

** import all data from 2016
local FileList: dir "$P_Data/Germany Broadband Speed Data/2016 CSVs/" files "*.csv"


cd "$P_Data/Germany Broadband Speed Data/2016 CSVs/" 
foreach file of local FileList {
	clear
	import delimited "`file'"

	rename v1 square
	rename v2 x_coordinate
	rename v3 y_coordinate
	rename v4 network_type
	drop v5
	rename v6 download_speed
	rename v7 upload_speed
	rename v8 ping_time
	rename v9 n_measurements
	
	gener year = 2016
	gener month = "`file'"
	replace month = subinstr(month,"OD_EU_Broadband_1km_2016_","",.)
	replace month = subinstr(month,".csv","",.)
	destring month, replace force
		
	
	append using "$P_Data_Processed/broadband_speed.dta"
	save "$P_Data_Processed/broadband_speed.dta", replace
	
}

** import all data from 2017
local FileList: dir "$P_Data/Germany Broadband Speed Data/2017 CSVs/" files "*.csv"


cd "$P_Data/Germany Broadband Speed Data/2017 CSVs/" 
foreach file of local FileList {
	clear
	import delimited "`file'"

	rename v1 square
	rename v2 x_coordinate
	rename v3 y_coordinate
	rename v4 network_type
	drop v5
	rename v6 download_speed
	rename v7 upload_speed
	rename v8 ping_time
	rename v9 n_measurements
	
	gener year = 2017
	gener month = "`file'"
	replace month = subinstr(month,"OD_EU_Broadband_1km_2017_","",.)
	replace month = subinstr(month,".csv","",.)
	destring month, replace force	
	
	append using "$P_Data_Processed/broadband_speed.dta"
	save "$P_Data_Processed/broadband_speed.dta", replace
	
}

** import all data from 2018
local FileList: dir "$P_Data/Germany Broadband Speed Data/2018 CSVs/" files "*.csv"


cd "$P_Data/Germany Broadband Speed Data/2018 CSVs/" 
foreach file of local FileList {
	clear
	import delimited "`file'"

	rename v1 square
	rename v2 x_coordinate
	rename v3 y_coordinate
	rename v4 network_type
	drop v5
	rename v6 download_speed
	rename v7 upload_speed
	rename v8 ping_time
	rename v9 n_measurements
	
	gener year = 2018
	gener month = "`file'"
	replace month = subinstr(month,"OD_EU_Broadband_1km_2018_","",.)
	replace month = subinstr(month,".csv","",.)
	destring month, replace force	
	
	append using "$P_Data_Processed/broadband_speed.dta"
	save "$P_Data_Processed/broadband_speed.dta", replace
	
}

save "$P_Data_Processed/broadband_speed.dta", replace

sort square year
by square: keep if _n==1
keep square x_coordinate y_coordinate  
outsheet using "$P_Data/Germany Broadband Speed Data/grid_coordinates.csv", comma nolabel replace

**
** now do stuff in QGIS  - basically create a spatial match between grid coordinates and NUTS2/NUTS3 regions  **
** output a file called grid_NUTS3_correspondence.csv that has grid points, NUTS2 and NUTS3 names **
** FILE IS PROVIDED **

** create grid coordinates/ NUTS2/3 correspondence
clear
insheet using "$P_Data/Germany Broadband Speed Data/grid_NUTS3_correspondence.csv"
keep if cntr_code=="DE"
drop nuts_name
drop x_coordinate y_coordinate
rename nuts_id nuts3
rename fid fid_nuts3
drop levl_code
drop cntr_code

save "$P_Data_Processed/grid_NUTS_correspondence.dta",replace

clear 
insheet using "$P_Data/Germany Broadband Speed Data/grid_NUTS2_correspondence.csv"
keep if cntr_code=="DE"
drop nuts_name
drop x_coordinate y_coordinate
rename nuts_id nuts2
rename fid fid_nuts2
drop levl_code
drop cntr_code
drop mount_type
drop urbn_type
drop coast_type 

merge 1:1 square using  "$P_Data_Processed/grid_NUTS_correspondence.dta"
drop _merge

save "$P_Data_Processed/grid_NUTS_correspondence.dta", replace

**


** now put everything back together 
clear
use "$P_Data_Processed/broadband_speed.dta"
merge m:1 square using "$P_Data_Processed/grid_NUTS_correspondence.dta"
keep if _merge==3
drop _merge


replace download_speed = download_speed/1000
replace upload_speed = upload_speed/1000

gegen tot_n_measurements = total(n_measurements), by(nuts2 network_type year )
gener down_speed_weight = download_speed*n_measurements/tot_n_measurements
gener up_speed_weight = upload_speed*n_measurements/tot_n_measurements
gener ping_weight = ping_time*n_measurements/tot_n_measurements

gegen avg_down_speed = total(down_speed_weight), by(nuts2 network_type year )
gegen avg_up_speed = total(up_speed_weight), by(nuts2 network_type year )
gegen avg_ping_speed = total(ping_weight), by(nuts2 network_type year )


sort nuts2 network_type year month square
by nuts2 network_type year: egen down_speed_5 = pctile(download_speed), p(5)
by nuts2 network_type year: egen down_speed_25 = pctile(download_speed), p(25)
by nuts2 network_type year: egen down_speed_50 = pctile(download_speed), p(50)
by nuts2 network_type year: egen down_speed_75 = pctile(download_speed), p(75)
by nuts2 network_type year: egen down_speed_95 = pctile(download_speed), p(95)

by nuts2 network_type year: egen up_speed_5 = pctile(upload_speed), p(5)
by nuts2 network_type year: egen up_speed_25 = pctile(upload_speed), p(25)
by nuts2 network_type year: egen up_speed_50 = pctile(upload_speed), p(50)
by nuts2 network_type year: egen up_speed_75 = pctile(upload_speed), p(75)
by nuts2 network_type year: egen up_speed_95 = pctile(upload_speed), p(95)


gcollapse avg_down_speed avg_up_speed avg_ping_speed down_speed_5 down_speed_25 down_speed_50 down_speed_75 down_speed_95 up_speed_50 up_speed_75 up_speed_95  tot_n_measurements , by(nuts2 network_type year )
save "$P_Data_Processed/broadband_speed.dta", replace

* saving wifi results 
keep if network_type=="Wifi"
rename avg_down_speed avg_down_speed_wifi
rename avg_up_speed avg_up_speed_wifi
rename avg_ping_speed avg_ping_speed_wifi
rename tot_n_measurements tot_n_meas_wifi
drop network_type

gener mbps_10_available = (avg_down_speed_wifi>10) 
gener mbps_15_available = (avg_down_speed_wifi>15) 
gener mbps_30_available = (avg_down_speed_wifi>30) 
gener mbps_50_available = (avg_down_speed_wifi>50) 

sort nuts2 year
by nuts2: replace mbps_10_available = 1 if mbps_10_available[_n-1]==1 & _n>1
by nuts2: replace mbps_15_available = 1 if mbps_15_available[_n-1]==1 & _n>1
by nuts2: replace mbps_30_available = 1 if mbps_30_available[_n-1]==1 & _n>1
by nuts2: replace mbps_50_available = 1 if mbps_50_available[_n-1]==1 & _n>1


gener at_least_10mbps = 0
replace at_least_10mbps = 1 if mbps_10_available==1 | mbps_15_available==1 | mbps_30_available==1

keep nuts2 year at_least_10mbps avg_down_speed avg_up_speed tot_n_meas_wifi 

save "$P_Data_Processed/broadband_speed.dta", replace
